#########################################
## Load and analyze 15-Fold model 
## estimation to assess genrealization
## error. 
#########################################

## Load estimation results
load("./GenErrorsKFoldFull.RData")
load("./GenErrorsKFoldForen.RData")
load("./GenErrorsKFoldContext.RData")

## Collect all BART estimations in a list
errors.gen  <- list(full.errors.boot
                    ,foren.errors.boot
                    ,context.errors.boot)

## Obtain mean and median individual prediction errors across 
##  15 folds, for each of the three types of model
out.mean <- laply(errors.gen,function(x){
  daply(x$ind.errors,"X1",function(y){
    apply(y[,c(3,2,4)],2,mean)
    })
  }
  )

out.median <- laply(errors.gen,function(x){
  daply(x$ind.errors,"X1",function(y){
    apply(y[,c(2,4)],2,median)
    })
  }
  )

## Bind arrays of mean and median errors                                                                        
out <- abind(out.mean,out.median)

## For each error measure, find model rankings 
## within each fold 
out <- aaply(out,3,function(x)apply(x,2,rank))

## Obtain mean rankings across 15 folds
## for each model
out <- aaply(out,3,colMeans)                                                                                                       

## How often does the full model (i.e. column 1) perform better than
## any of the other two?
full.best <- sum(apply(out,1,function(x)which.min(x)==1))
cat("Full model is best in",full.best,"out of 15 folds")

## Consensus rankings
cons.ranks  <- round(colMeans(out),2)
cat("Consensus ranks 15-Fold GenError:\nFull model: ",cons.ranks[1]
    ,"\nForensics: ", cons.ranks[2]
    ,"\nContext-only: ", cons.ranks[3]
)

